'''
@Author: your name
@Date: 2020-05-19 09:11:10
@LastEditTime: 2020-05-19 19:00:02
@LastEditors: Please set LastEditors
@Description: In User Settings Edit
@FilePath: \Gitee\crawler_test\crawl_gif.py
'''
import requests
import re
import xlwt
# import sys
# import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')


header = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
          }
url = "https://www.lagou.com/shenzhen-zhaopin/ceshigongchengshi/?labelWords=label"
reps = requests.get(url, headers=header)

# print(reps.text)

wordBook = xlwt.Wordbook(encoding='utf-8')
workSheet = wordBook.add_sheet('res')
colName = ['岗位名称', '公司名称', '公司地点', '薪资', '时间']
for col in range(len(colName)):
    workSheet.write(0, colName[col])
line = 1

# 所有工作详情
infoLines = re.findall(
    '<li class="con_list_item default_list(.*?)></li>', reps.text, re.S)
# 遍历每一条数据
for one in infoLines:
    # print(one)
    # 1、岗位名称
    backData = re.findall(
        'data-positionname="(.*?)"', one, re.S)
    print(backData)
    workSheet.write(line, 0, backData)

    # 2、公司名称
    companyName = re.findall('data-company="(.*?)"', one, re.S)
    print(companyName)
    workSheet.write(line, 1, companyName)

    # 3、公司地点
    companyAddress = re.findall(
        '<span class="add">(.*?)</span>', one, re.S)
    print(companyAddress)
    workSheet.write(line, 2, companyAddress)

    # 4、薪资
    salary = re.findall('data-salary="(.*?)"', one, re.S)
    print(salary)
    workSheet.write(line, 3, salary)

    # 5、发布时间
    pubdate = re.findall('<div class="p_top">(.*?)</div>', one, re.S)
    for i in pubdate:
        put = re.findall(
            '<span class="format-time">(.*?)</span>', i, re.S)[0].strip()
        print(put)
        workSheet.write(line, 4, put)
    print(one)
